Purpose:
Runs survival analysis models using splicing cluster assignment and 1) single exon splicing burden index (SBI) or 2) KEGG Spliceosome GSVA scores as a predictor
Uses a wrapper function (survival_analysis) from utils
folder.
Load packages, set directory paths and call setup script
library(tidyverse)
library(survival)
library(ggpubr)
library(ggplot2)
library(patchwork)
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
analysis_dir <- file.path(root_dir, "analyses", "survival")
input_dir <- file.path(analysis_dir, "results")
results_dir <- file.path(analysis_dir, "results")
plot_dir <- file.path(analysis_dir, "plots")
# If the input and results directories do not exist, create it
if (!dir.exists(results_dir)) {
dir.create(results_dir, recursive = TRUE)
}
source(file.path(analysis_dir, "util", "survival_models.R"))
Set metadata and cluster assignment file paths
metadata_file <- file.path(input_dir, "splicing_indices_with_survival.tsv")
cluster_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"sample-cluster-metadata-top-5000-events-stranded.tsv")
kegg_scores_stranded_file <- file.path(root_dir, "analyses",
"sample-psi-clustering", "results",
"gsva_output_stranded.tsv")
Wrangle data Add cluster assignment and spliceosome gsva scores to
metadata and define column lgg_group (LGG or
non_LGG)
metadata <- read_tsv(metadata_file)
Rows: 707 Columns: 24
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (9): Kids_First_Biospecimen_ID, Histology, Kids_First_Participant_ID, molecular_subtype, extent_of_tumor_...
dbl (15): Total, AS_neg, AS_pos, AS_total, SI_A3SS, SI_A5SS, SI_RI, SI_SE, SI_Total, EFS_days, OS_days, age_at...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clusters <- read_tsv(cluster_file) %>%
dplyr::rename(Kids_First_Biospecimen_ID = sample_id)
Rows: 752 Columns: 8
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (6): sample_id, plot_group, plot_group_hex, RNA_library, molecular_subtype, plot_group_n
dbl (2): cluster, group_n
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
gsva_scores <- read_tsv(kegg_scores_stranded_file) %>%
dplyr::filter(geneset == "KEGG_SPLICEOSOME") %>%
dplyr::rename(spliceosome_gsva_score = score)
Rows: 23312 Columns: 3
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): sample_id, geneset
dbl (1): score
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# how many clusters?
n_clust <- length(unique(clusters$cluster))
metadata <- metadata %>%
right_join(clusters %>% dplyr::select(Kids_First_Biospecimen_ID,
cluster)) %>%
left_join(gsva_scores %>% dplyr::select(sample_id,
spliceosome_gsva_score),
by = c("Kids_First_Biospecimen_ID" = "sample_id")) %>%
dplyr::mutate(cluster = glue::glue("Cluster {cluster}")) %>%
dplyr::mutate(cluster = fct_relevel(cluster,
paste0("Cluster ", 1:n_clust))) %>%
dplyr::mutate(lgg_group = case_when(
Histology == "Low-grade glioma" ~ "LGG",
TRUE ~ "non-LGG"
)) %>%
dplyr::mutate(SBI = SI_Total * 10) %>%
dplyr::mutate(age_at_diagnosis_years = age_at_diagnosis_days/365.25)
Joining with `by = join_by(Kids_First_Biospecimen_ID)`
Generate coxph models including extent of tumor resection, lgg group, and cluster assignment and SBI as covariates
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_SBI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_SBI.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_SBI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_SBI.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis, replacing SBI with KEGG spliceosome gsva score
add_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
forest_os <- plotForest(readRDS(file.path(results_dir, "cox_OS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_os
ggsave(file.path(plot_dir, "forest_add_OS_resection_lgg_group_cluster_assignment_spliceosome_score.pdf"),
forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
models <- c("spliceosome_gsva_score", "SBI")
for (each in models) {
int_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
int_forest_efs <- plotForest(readRDS(file.path(results_dir, paste0("cox_EFS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_efs
ggsave(file.path(plot_dir, paste0("forest_int_EFS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
int_model_os <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = paste0("extent_of_tumor_resection+lgg_group+cluster*", each, "+age_at_diagnosis_years"),
file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS")),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
int_forest_os <- plotForest(readRDS(file.path(results_dir, paste0("cox_OS_interaction_terms_resection_lgg_group_cluster_", each, ".RDS"))))
int_forest_os
ggsave(file.path(plot_dir, paste0("forest_int_OS_resection_lgg_group_cluster_assignment_", each, ".pdf")),
int_forest_os,
width = 10, height = 6, units = "in",
device = "pdf")
}
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
add_model_efs <- fit_save_model(metadata[!metadata$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+lgg_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_efs <- plotForest(readRDS(file.path(results_dir, "cox_EFS_additive_terms_resection_lgg_group_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_efs
ggsave(file.path(plot_dir, "forest_add_EFS_resection_lgg_group_cluster_assignment_spliceosome_score.pdf"),
forest_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Subset metadata for LGG, and only include clusters with
>= 10 samples
lgg <- metadata %>%
dplyr::filter(Histology == "Low-grade glioma") %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(mol_sub_group = fct_relevel(mol_sub_group, c("Wildtype", "BRAF V600E", "BRAF fusion",
"Other alteration", "SEGA"
)))
retain_clusters_lgg <- lgg %>%
dplyr::count(cluster) %>%
filter(n >= 10) %>%
pull(cluster)
lgg <- lgg %>%
filter(cluster %in% retain_clusters_lgg) %>%
dplyr::mutate(cluster = factor(cluster))
Generate coxph models including covariates
extent_of_tumor_resection, mol_sub_group,
cluster, and SBI and plot
# identify LGG clusters
lgg_clusters <- metadata %>%
filter(lgg_group == "LGG") %>%
mutate(cluster = as.integer(gsub("cluster", "", cluster))) %>%
pull(cluster) %>%
sort() %>%
unique()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `cluster = as.integer(gsub("cluster", "", cluster))`.
Caused by warning:
! NAs introduced by coercion
add_model_lgg_efs <- fit_save_model(lgg[!lgg$cluster %in% lgg_clusters & !lgg$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+mol_sub_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6,8 ; coefficient may be infinite.
forest_lgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_SBI.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_lgg_efs
ggsave(file.path(plot_dir, "forest_add_EFS_LGG_resection_subtype_cluster_assignment_SBI.pdf"),
forest_lgg_efs,
width = 10, height = 6, units = "in",
device = "pdf")
repeat analysis replacing SBI with
spliceosome_gsva_score
add_model_lgg_efs <- fit_save_model(lgg[!lgg$cluster %in% lgg_clusters & !lgg$extent_of_tumor_resection %in% c("Not Reported", "Unavailable"),],
terms = "extent_of_tumor_resection+mol_sub_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 6,8 ; coefficient may be infinite.
forest_lgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_lgg_EFS_additive_terms_resection_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
forest_lgg_efs
ggsave(file.path(plot_dir, "forest_add_EFS_LGG_resection_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_lgg_efs,
width = 10, height = 6, units = "in",
device = "pdf")
Subset metadata for HGG and retain cluster with n >=
10
hgg <- metadata %>%
dplyr::filter(Histology %in% c("Other high-grade glioma", "DIPG or DMG")) %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(mol_sub_group = fct_relevel(mol_sub_group, c("HGG, H3 wildtype", "HGG, H3 wildtype, TP53",
"DMG, H3 K28", "DMG, H3 K28, TP53",
"DHG, H3 G35", "DHG, H3 G35, TP53",
"HGG, IDH, TP53", "HGG, PXA", "HGG, PXA, TP53",
"IHG, ALK-altered", "IHG, NTRK-altered",
"IHG, ROS1-altered"
))) %>%
dplyr::filter(!is.na(OS_days) | !is.na(EFS_days))
Warning: There was 1 warning in `dplyr::mutate()`.
ℹ In argument: `mol_sub_group = fct_relevel(...)`.
Caused by warning:
! 1 unknown level in `f`: HGG, PXA, TP53
retain_clusters_hgg <- hgg %>%
dplyr::count(cluster) %>%
filter(n >= 10) %>%
pull(cluster)
hgg <- hgg %>%
filter(cluster %in% retain_clusters_hgg) %>%
dplyr::mutate(cluster = factor(cluster)) %>%
dplyr::mutate(SI_group = case_when(
SBI > summary(SBI)["3rd Qu."] ~ "High SBI",
SBI < summary(SBI)["1st Qu."] ~ "Low SBI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(spliceosome_group = case_when(
spliceosome_gsva_score > summary(spliceosome_gsva_score)["3rd Qu."] ~ "Splice GSVA 4th Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["Median"] ~ "Splice GSVA 3rd Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["1st Qu."] ~ "Splice GSVA 2nd Q",
TRUE ~ "Splice GSVA 1st Q"
)) %>%
dplyr::mutate(SI_group = fct_relevel(SI_group,
c("High SBI", "Low SBI"))) %>%
dplyr::mutate(spliceosome_group = fct_relevel(spliceosome_group,
c("Splice GSVA 1st Q",
"Splice GSVA 2nd Q",
"Splice GSVA 3rd Q",
"Splice GSVA 4th Q")))
Generate HGG KM models with spliceosome_group as
covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
hgg_kap_os <- survival_analysis(
metadata = hgg %>% dplyr::filter(!is.na(spliceosome_group)),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ spliceosome_group with kap.meier
readr::write_rds(hgg_kap_os,
file.path(results_dir, "logrank_hgg_OS_splice_group.RDS"))
hgg_kap_efs <- survival_analysis(
metadata = hgg %>% dplyr::filter(!is.na(spliceosome_group)),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ spliceosome_group with kap.meier
readr::write_rds(hgg_kap_efs,
file.path(results_dir, "logrank_hgg_EFS_splice_group.RDS"))
Generate KM plots
km_hgg_os_plot <- plotKM(model = hgg_kap_os,
variable = "spliceosome_group",
combined = F,
title = "HGG, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_hgg_OS_spliceosome_score.pdf"),
km_hgg_os_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
km_hgg_efs_plot <- plotKM(model = hgg_kap_efs,
variable = "spliceosome_group",
combined = F,
title = "HGG, event-free survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_hgg_EFS_spliceosome_score.pdf"),
km_hgg_efs_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Generate coxph models for HGG including covariates
mol_sub_group cluster, and SBI,
and plot
add_model_hgg_os <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_hgg_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_SBI.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
forest_hgg_os
ggsave(file.path(plot_dir, "forest_add_OS_HGG_subtype_cluster_assignment_SBI.pdf"),
forest_hgg_os,
width = 9, height = 5, units = "in",
device = "pdf")
add_model_hgg_efs <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_years+SBI",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_SBI.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_hgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_SBI.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_HGG_subtype_cluster_assignment_SBI.pdf"),
forest_hgg_efs,
width = 9, height = 5, units = "in",
device = "pdf")
Repeat analysis replacing SBI with
spliceosome_gsva_score
add_model_hgg_os <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_hgg_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
forest_hgg_os
ggsave(file.path(plot_dir, "forest_add_OS_HGG_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_hgg_os,
width = 9, height = 5, units = "in",
device = "pdf")
add_model_hgg_efs <- fit_save_model(hgg,
terms = "mol_sub_group+cluster+age_at_diagnosis_years+spliceosome_gsva_score",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
forest_hgg_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_HGG_subtype_cluster_assignment_spliceosome_score.pdf"),
forest_hgg_efs,
width = 9, height = 5, units = "in",
device = "pdf")
Filter for cluster 6
cluster6_df <- metadata %>%
dplyr::filter(cluster == "Cluster 6",
!is.na(EFS_days)) %>%
dplyr::mutate(SI_group = case_when(
SBI > summary(SBI)["3rd Qu."] ~ "High SBI",
SBI < summary(SBI)["1st Qu."] ~ "Low SBI",
TRUE ~ NA_character_
)) %>%
dplyr::mutate(spliceosome_group = case_when(
spliceosome_gsva_score > summary(spliceosome_gsva_score)["3rd Qu."] ~ "Splice GSVA 4th Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["Median"] ~ "Splice GSVA 3rd Q",
spliceosome_gsva_score > summary(spliceosome_gsva_score)["1st Qu."] ~ "Splice GSVA 2nd Q",
TRUE ~ "Splice GSVA 1st Q"
)) %>%
dplyr::mutate(SI_group = fct_relevel(SI_group,
c("High SBI", "Low SBI"))) %>%
dplyr::mutate(spliceosome_group = fct_relevel(spliceosome_group,
c("Splice GSVA 1st Q",
"Splice GSVA 2nd Q",
"Splice GSVA 3rd Q",
"Splice GSVA 4th Q")))
Generate KM models with SI_group as covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c6_si_kap_os <- survival_analysis(
metadata = cluster6_df %>% dplyr::filter(!is.na(SI_group)),
ind_var = "SI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ SI_group with kap.meier
readr::write_rds(c6_si_kap_os,
file.path(results_dir, "logrank_cluster6_OS_SBI.RDS"))
c6_si_kap_efs <- survival_analysis(
metadata = cluster6_df %>% dplyr::filter(!is.na(SI_group)),
ind_var = "SI_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ SI_group with kap.meier
readr::write_rds(c6_si_kap_efs,
file.path(results_dir, "logrank_cluster6_EFS_SBI.RDS"))
Generate Cluster 6 KM SI_group plots
km_c6_si_os_plot <- plotKM(model = c6_si_kap_os,
variable = "SI_group",
combined = F,
title = "Cluster 6, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_OS_sbi_group.pdf"),
km_c6_si_os_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
km_c6_si_efs_plot <- plotKM(model = c6_si_kap_efs,
variable = "SI_group",
combined = F,
title = "Cluster 6, event-free survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_EFS_sbi_group.pdf"),
km_c6_si_efs_plot,
width = 8, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Generate KM models with spliceosome_group as
covariate
# Generate kaplan meier survival models for OS and EFS, and save outputs
c6_splice_kap_os <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(spliceosome_group = factor(spliceosome_group,
levels = c("Splice GSVA 1st Q", "Splice GSVA 4th Q"))),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "OS_days",
status_col = "OS_status"
)
Testing model: survival::Surv(OS_days, OS_status) ~ spliceosome_group with kap.meier
readr::write_rds(c6_splice_kap_os,
file.path(results_dir, "logrank_cluster6_OS_splice_group.RDS"))
c6_splice_kap_efs <- survival_analysis(
metadata = cluster6_df %>%
dplyr::filter(spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(spliceosome_group = factor(spliceosome_group,
levels = c("Splice GSVA 1st Q", "Splice GSVA 4th Q"))),
ind_var = "spliceosome_group",
test = "kap.meier",
metadata_sample_col = "Kids_First_Biospecimen_ID",
days_col = "EFS_days",
status_col = "EFS_status"
)
Testing model: survival::Surv(EFS_days, EFS_status) ~ spliceosome_group with kap.meier
readr::write_rds(c6_splice_kap_efs,
file.path(results_dir, "logrank_cluster6_EFS_splice_group.RDS"))
Generate Cluster 6 KM spliceosome_group plots
km_c6_splice_os_plot <- plotKM(model = c6_splice_kap_os,
variable = "spliceosome_group",
combined = F,
title = "Cluster 6, overall survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_OS_splice_group.pdf"),
km_c6_splice_os_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
km_c6_splice_efs_plot <- plotKM(model = c6_splice_kap_efs,
variable = "spliceosome_group",
combined = F,
title = "Cluster 6, event-free survival",
p_pos = "topright")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
ggsave(file.path(plot_dir, "km_cluster6_EFS_splice_group.pdf"),
km_c6_splice_efs_plot,
width = 9, height = 5, units = "in",
device = "pdf")
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's colour values.
Warning: No shared levels found between `names(values)` of the manual scale and the data's fill values.
Assess EFS and OS by SBI or spliceosome GSVA score in multivariate models and generate forest plots
add_model_c6_efs <- fit_save_model(cluster6_df %>%
dplyr::filter(extent_of_tumor_resection != "Unavailable",
spliceosome_group %in% c("Splice GSVA 4th Q", "Splice GSVA 1st Q")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+spliceosome_group",
file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS"),
"multivariate",
years_col = "EFS_years",
status_col = "EFS_status")
Warning: There was 1 warning in `dplyr::mutate()`.
ℹ In argument: `Histology = fct_relevel(...)`.
Caused by warning:
! 2 unknown levels in `f`: Ependymoma and Low-grade glioma
forest_c6_spliceosome_efs <- plotForest(readRDS(file.path(results_dir, "cox_hgg_EFS_additive_terms_subtype_cluster_spliceosome_score.RDS")))
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_EFS_cluster6_histology_resection_spliceosome_group.pdf"),
forest_c6_spliceosome_efs,
width = 9, height = 4, units = "in",
device = "pdf")
add_model_c6_os <- fit_save_model(cluster6_df %>%
dplyr::filter(!extent_of_tumor_resection %in% c("Not Reported", "Unavailable")) %>%
dplyr::mutate(Histology = fct_relevel(Histology,
c("Other high-grade glioma", "Atypical Teratoid Rhabdoid Tumor",
"DIPG or DMG", "Ependymoma", "Mesenchymal tumor",
"Other CNS embryonal tumor", "Low-grade glioma"))),
terms = "extent_of_tumor_resection+age_at_diagnosis_years+Histology+SBI",
file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_si_group.RDS"),
"multivariate",
years_col = "OS_years",
status_col = "OS_status")
Warning in coxph.fit(X, Y, istrat, offset, init, control, weights = weights, :
Loglik converged before variable 9 ; coefficient may be infinite.
forest_c6_si_os <- plotForest(readRDS(file.path(results_dir, "cox_hgg_OS_additive_terms_subtype_cluster_si_group.RDS")))
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_errorbarh()`).
Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_text()`).
ggsave(file.path(plot_dir, "forest_add_OS_cluster6_histology_resection_si.pdf"),
forest_c6_si_os,
width = 9, height = 4, units = "in",
device = "pdf")
Print session info
sessionInfo()
R version 4.4.0 (2024-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: Etc/UTC
tzcode source: system (glibc)
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggforce_0.4.2 RColorBrewer_1.1-3 viridis_0.6.5 viridisLite_0.4.2 ggtext_0.1.2
[6] data.table_1.16.4 vroom_1.6.5 reshape2_1.4.4 ggthemes_5.1.0 circlize_0.4.16
[11] ComplexHeatmap_2.20.0 gtools_3.9.5 survminer_0.4.9 patchwork_1.2.0 ggpubr_0.6.0
[16] survival_3.7-0 lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[21] purrr_1.0.4 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
[26] tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] gridExtra_2.3 rlang_1.1.5 magrittr_2.0.3 clue_0.3-65 GetoptLong_1.0.5
[6] matrixStats_1.3.0 compiler_4.4.0 png_0.1-8 systemfonts_1.2.1 vctrs_0.6.5
[11] pkgconfig_2.0.3 shape_1.4.6.1 crayon_1.5.3 fastmap_1.2.0 backports_1.5.0
[16] labeling_0.4.3 KMsurv_0.1-5 utf8_1.2.4 rmarkdown_2.29 markdown_1.13
[21] tzdb_0.4.0 ragg_1.3.3 bit_4.5.0.1 xfun_0.50 cachem_1.1.0
[26] jsonlite_1.8.9 tweenr_2.0.3 broom_1.0.7 parallel_4.4.0 cluster_2.1.8
[31] R6_2.6.1 bslib_0.9.0 stringi_1.8.4 car_3.1-2 jquerylib_0.1.4
[36] Rcpp_1.0.14 iterators_1.0.14 knitr_1.49 zoo_1.8-12 IRanges_2.38.1
[41] Matrix_1.7-2 splines_4.4.0 timechange_0.3.0 tidyselect_1.2.1 yaml_2.3.10
[46] rstudioapi_0.17.1 abind_1.4-5 doParallel_1.0.17 codetools_0.2-20 lattice_0.22-6
[51] plyr_1.8.9 withr_3.0.2 evaluate_1.0.3 polyclip_1.10-6 xml2_1.3.6
[56] survMisc_0.5.6 pillar_1.10.1 colorblindr_0.1.0 carData_3.0-5 foreach_1.5.2
[61] stats4_4.4.0 generics_0.1.3 rprojroot_2.0.4 S4Vectors_0.42.1 hms_1.1.3
[66] commonmark_1.9.2 munsell_0.5.1 scales_1.3.0 xtable_1.8-4 glue_1.8.0
[71] tools_4.4.0 ggsignif_0.6.4 cowplot_1.1.3 colorspace_2.1-1 cli_3.6.4
[76] km.ci_0.5-6 textshaping_1.0.0 gtable_0.3.6 rstatix_0.7.2 sass_0.4.9
[81] digest_0.6.37 BiocGenerics_0.50.0 rjson_0.2.21 farver_2.1.2 htmltools_0.5.8.1
[86] lifecycle_1.0.4 GlobalOptions_0.1.2 gridtext_0.1.5 bit64_4.6.0-1 MASS_7.3-64